package org.biogroovy.io

import java.io.IOException;
import java.net.URL;
import java.util.List;

import groovy.util.slurpersupport.GPathResult
import groovy.util.slurpersupport.NodeChild

import org.biogroovy.models.Article
import org.htmlcleaner.CleanerProperties
import org.htmlcleaner.HtmlCleaner
import org.htmlcleaner.SimpleXmlSerializer
import org.htmlcleaner.TagNode

/**
 * This class parses the metadata from an HTML document and returns it as an Article.
 */
class HTMLMetadataSlurper extends AbsXmlSlurper<Article> {
	


	@Override
	public void parse(Article article, NodeChild node) {
		article.title = node.head.title;
		article.abs = node.head.meta.find{it['@name'] == 'abstract'}.@content.text();
		
		String keywordTxt = node.head.meta.find{it['@name'] == 'keywords'}.@content.text();
		keywordTxt.split(";").each{String keyword ->
			article.keywords.add(keyword.trim());
		}
		
	}

	@Override
	public Article read(InputStream inputStream) throws IOException {
		HtmlCleaner cleaner = new HtmlCleaner();
		TagNode node = cleaner.clean(inputStream);
		
		CleanerProperties props = cleaner.getProperties()
		SimpleXmlSerializer serializer = new SimpleXmlSerializer(props)
		String xml = serializer.getAsString(node)
		
		XmlSlurper slurper = createSlurper();
		GPathResult root = slurper.parseText(xml);
		Article article = new Article();
		parse(article, root);
		return article
	}
	

	/**
	 * Since you can only parse a single html document, this doesn't really make sense.
	 * This method will simply parse the metadata from the input stream and return a list
	 * containing only a single article.
	 */
	@Override
	public List<Article> readList(InputStream inputStream) throws IOException {
		Article article = read(inputStream);
		List<Article> articleList = new ArrayList<>();
		articleList.add(article);
		return articleList;
	}

	@Override
	protected void parseDbReferences(NodeChild root, Article node) {
		// TODO Auto-generated method stub
		
	}

	@Override
	public Article fetch(String id) throws IOException {
		// TODO Auto-generated method stub
		return null;
	}

	@Override
	public URL getUrl(String id, Map<String, String> paramMap) {
		// TODO Auto-generated method stub
		return null;
	}



	@Override
	public List<Article> fetchAll(String id) throws IOException {
		// TODO Auto-generated method stub
		return null;
	}

	@Override
	public IFetcher<Article> getNewInstance() {
		return new HTMLMetadataSlurper();
	}

}
